This project uses images of daisies, dandelions, roses, sunflowers, and tulips from flickr and google images in an attempt to predict which species a flower is from a 224x224 pixel image.
library(keras)
library(tensorflow)
gpu <- tf$config$experimental$get_visible_devices('GPU')[[1]]
tf$config$experimental$set_memory_growth(device = gpu, enable = TRUE)
base_dir <- "C:/Users/ian/Documents/Projects/flowers_final"
train_dir <- file.path(base_dir, "train")
validation_dir <- file.path(base_dir, "validation")
test_dir <- file.path(base_dir, "test")
3120 training images
600 validation images
600 test images
768 daisies
1049 dandelions
784 roses
734 sunflowers
984 tulips
Images were split into an 80% training set and a 20% test set then the training set was split again at the same ratio to the validation set.
train_datagen <- image_data_generator(rescale = 1/255)
validation_datagen <- image_data_generator(rescale = 1/255)
train_generator <- flow_images_from_directory(
train_dir,
train_datagen,
target_size = c(224, 224),
batch_size = 20,
class_mode = "categorical"
)
validation_generator <- flow_images_from_directory(
validation_dir,
validation_datagen,
target_size = c(224, 224),
batch_size = 20,
class_mode = "categorical"
)
batch <- generator_next(train_generator)
str(batch)
## List of 2
## $ : num [1:20, 1:224, 1:224, 1:3] 0.847 0 0.584 1 0.227 ...
## $ : num [1:20, 1:5] 1 0 0 0 0 0 0 1 0 0 ...
head(batch[[2]]) # labels are a one-hot encoded matrix with one column for each class
## [,1] [,2] [,3] [,4] [,5]
## [1,] 1 0 0 0 0
## [2,] 0 1 0 0 0
## [3,] 0 0 0 1 0
## [4,] 0 0 0 1 0
## [5,] 0 0 0 0 1
## [6,] 0 1 0 0 0
Some images in the dataset have people or bugs in the way of the flowers, however most photos have the flower in the foreground.
imgs = batch[[1]][1:8,,,]
str(imgs)
## num [1:8, 1:224, 1:224, 1:3] 0.847 0 0.584 1 0.227 ...
{op <- par(mfrow = c(2, 4), pty = "s", mar = c(1, 0, 1, 0))
for (i in 1:8) {
plot(as.raster(imgs[i,,,]))
}
par(op)}
We begin by building a network with 3 convolution and max pooling layers before flattening and sending the output to a densely connected layer.
model <- keras_model_sequential() %>%
layer_conv_2d(filters = 32, kernel_size = c(3, 3), activation = "relu", input_shape = c(224, 224, 3)) %>%
layer_max_pooling_2d(pool_size = c(2, 2)) %>%
layer_conv_2d(filters = 64, kernel_size = c(3, 3), activation = "relu") %>%
layer_max_pooling_2d(pool_size = c(2, 2)) %>%
layer_conv_2d(filters = 64, kernel_size = c(3, 3), activation = "relu") %>%
layer_max_pooling_2d(pool_size = c(2, 2)) %>%
layer_conv_2d(filters = 128, kernel_size = c(3, 3), activation = "relu") %>%
layer_max_pooling_2d(pool_size = c(2, 2)) %>%
layer_flatten() %>%
layer_dense(units = 256, activation = "relu") %>%
layer_dense(units = 5, activation = "softmax")
model %>% compile(
loss = "categorical_crossentropy",
optimizer = optimizer_adam(lr = 1e-4),
metrics = c("accuracy")
)
model
## Model
## Model: "sequential"
## ________________________________________________________________________________
## Layer (type) Output Shape Param #
## ================================================================================
## conv2d (Conv2D) (None, 222, 222, 32) 896
## ________________________________________________________________________________
## max_pooling2d (MaxPooling2D) (None, 111, 111, 32) 0
## ________________________________________________________________________________
## conv2d_1 (Conv2D) (None, 109, 109, 64) 18496
## ________________________________________________________________________________
## max_pooling2d_1 (MaxPooling2D) (None, 54, 54, 64) 0
## ________________________________________________________________________________
## conv2d_2 (Conv2D) (None, 52, 52, 64) 36928
## ________________________________________________________________________________
## max_pooling2d_2 (MaxPooling2D) (None, 26, 26, 64) 0
## ________________________________________________________________________________
## conv2d_3 (Conv2D) (None, 24, 24, 128) 73856
## ________________________________________________________________________________
## max_pooling2d_3 (MaxPooling2D) (None, 12, 12, 128) 0
## ________________________________________________________________________________
## flatten (Flatten) (None, 18432) 0
## ________________________________________________________________________________
## dense (Dense) (None, 256) 4718848
## ________________________________________________________________________________
## dense_1 (Dense) (None, 5) 1285
## ================================================================================
## Total params: 4,850,309
## Trainable params: 4,850,309
## Non-trainable params: 0
## ________________________________________________________________________________
model %>% save_model_hdf5('base_model.h5')
The model appears to overfit the dataset very quickly, this is likely due to the small number of training images.
model <- load_model_hdf5('base_model.h5')
history <- model %>% fit_generator(
train_generator,
steps_per_epoch = 156,
epochs = 30,
validation_data = validation_generator,
validation_steps = 30,
verbose = 0
)
plot(history)
## `geom_smooth()` using formula 'y ~ x'
model %>% save_model_hdf5('base_model.h5')
To improve model performance we can try augmenting the images. Here we create a data pipeline to feed batches of the photos to the model rather than all at once to save memory and compute more efficiently.
datagen <- image_data_generator(
rescale = 1/255,
rotation_range = 40,
width_shift_range = 0.2,
height_shift_range = 0.2,
shear_range = 0.2,
zoom_range = 0.2,
horizontal_flip = TRUE
)
test_datagen <- image_data_generator(rescale = 1/255)
train_generator <- flow_images_from_directory(
train_dir,
datagen,
target_size = c(224, 224),
batch_size = 20,
class_mode = "categorical"
)
validation_generator <- flow_images_from_directory(
validation_dir,
test_datagen,
target_size = c(224, 224),
batch_size = 20,
class_mode = "categorical"
)
train_tulips_dir = "C:/Users/ian/Documents/Projects/flowers_final/train/tulip"
fnames <- list.files(train_tulips_dir, full.names = TRUE)
img_path <- fnames[[3]]
img <- image_load(img_path, target_size = c(224, 224))
img_array <- image_to_array(img)
img_array <- array_reshape(img_array, c(1, 224, 224, 3))
augmentation_generator <- flow_images_from_data(
img_array,
generator = datagen,
batch_size = 1
)
{op <- par(mfrow = c(2, 2), pty = "s", mar = c(1, 0, 1, 0))
for (i in 1:4) {
batch <- generator_next(augmentation_generator)
plot(as.raster(batch[1,,,]))
}
par(op)}
Here I reuse the model from before, it appears to give the decent results compared to smaller and larger models.
model <- keras_model_sequential() %>%
layer_conv_2d(filters = 32, kernel_size = c(3, 3), activation = "relu", input_shape = c(224, 224, 3)) %>%
layer_max_pooling_2d(pool_size = c(2, 2)) %>%
layer_conv_2d(filters = 64, kernel_size = c(3, 3), activation = "relu") %>%
layer_max_pooling_2d(pool_size = c(2, 2)) %>%
layer_conv_2d(filters = 64, kernel_size = c(3, 3), activation = "relu") %>%
layer_max_pooling_2d(pool_size = c(2, 2)) %>%
layer_conv_2d(filters = 128, kernel_size = c(3, 3), activation = "relu") %>%
layer_max_pooling_2d(pool_size = c(2, 2)) %>%
layer_flatten() %>%
layer_dense(units = 256, activation = "relu") %>%
layer_dense(units = 5, activation = "softmax")
model %>% compile(
loss = "categorical_crossentropy",
optimizer = optimizer_adam(lr = 1e-4),
metrics = c("accuracy")
)
model
## Model
## Model: "sequential_1"
## ________________________________________________________________________________
## Layer (type) Output Shape Param #
## ================================================================================
## conv2d_4 (Conv2D) (None, 222, 222, 32) 896
## ________________________________________________________________________________
## max_pooling2d_4 (MaxPooling2D) (None, 111, 111, 32) 0
## ________________________________________________________________________________
## conv2d_5 (Conv2D) (None, 109, 109, 64) 18496
## ________________________________________________________________________________
## max_pooling2d_5 (MaxPooling2D) (None, 54, 54, 64) 0
## ________________________________________________________________________________
## conv2d_6 (Conv2D) (None, 52, 52, 64) 36928
## ________________________________________________________________________________
## max_pooling2d_6 (MaxPooling2D) (None, 26, 26, 64) 0
## ________________________________________________________________________________
## conv2d_7 (Conv2D) (None, 24, 24, 128) 73856
## ________________________________________________________________________________
## max_pooling2d_7 (MaxPooling2D) (None, 12, 12, 128) 0
## ________________________________________________________________________________
## flatten_1 (Flatten) (None, 18432) 0
## ________________________________________________________________________________
## dense_2 (Dense) (None, 256) 4718848
## ________________________________________________________________________________
## dense_3 (Dense) (None, 5) 1285
## ================================================================================
## Total params: 4,850,309
## Trainable params: 4,850,309
## Non-trainable params: 0
## ________________________________________________________________________________
model %>% save_model_hdf5('aug_model.h5')
The results here look a lot better the overfitting is not as severe as before and we get a modest increase in accuracy.
model <- load_model_hdf5('aug_model.h5')
history <- model %>% fit_generator(
train_generator,
steps_per_epoch = 156,
epochs = 30,
validation_data = validation_generator,
validation_steps = 30,
verbose=0
)
model %>% save_model_hdf5('aug_model.h5')
plot(history)
## `geom_smooth()` using formula 'y ~ x'
Here we use a pretrained network for feature extraction and fine tuning. This model uses the vgg16 architecture trained on the imagenet dataset to increase the models performance.
For clarity we re-define the data pipeline. In addition I reduced the batch size to prevent crashing my gpu since this is a model takes up a lot of memory.
datagen <- image_data_generator(
rescale = 1/255,
rotation_range = 40,
width_shift_range = 0.2,
height_shift_range = 0.2,
shear_range = 0.2,
zoom_range = 0.2,
horizontal_flip = TRUE
)
test_datagen <- image_data_generator(rescale = 1/255)
train_generator <- flow_images_from_directory(
train_dir,
datagen,
target_size = c(224, 224),
batch_size = 1,
class_mode = "categorical"
)
validation_generator <- flow_images_from_directory(
validation_dir,
test_datagen,
target_size = c(224, 224),
batch_size = 1,
class_mode = "categorical"
)
First we freeze the weights in the vgg16 base, then we train the densely connected layer on top. After we unfreeze the last few layers of the vgg16 base and train again to fine tune the models performance.
conv_base <- application_vgg16(
weights = "imagenet",
include_top = FALSE,
input_shape = c(224, 224, 3)
)
model <- keras_model_sequential() %>%
conv_base %>%
layer_flatten() %>%
layer_dense(units = 256, activation = "relu") %>%
layer_dense(units = 5, activation = "softmax")
freeze_weights(conv_base) # freeze weights to perform feature extraction
model %>% compile(
loss = "categorical_crossentropy",
optimizer = optimizer_adam(lr = 2e-5),
metrics = c("accuracy")
)
model
## Model
## Model: "sequential_2"
## ________________________________________________________________________________
## Layer (type) Output Shape Param #
## ================================================================================
## vgg16 (Model) (None, 7, 7, 512) 14714688
## ________________________________________________________________________________
## flatten_2 (Flatten) (None, 25088) 0
## ________________________________________________________________________________
## dense_4 (Dense) (None, 256) 6422784
## ________________________________________________________________________________
## dense_5 (Dense) (None, 5) 1285
## ================================================================================
## Total params: 21,138,757
## Trainable params: 6,424,069
## Non-trainable params: 14,714,688
## ________________________________________________________________________________
# Performs feature extraction
history <- model %>% fit_generator(
train_generator,
steps_per_epoch = 1320,
epochs = 30,
validation_data = validation_generator,
validation_steps = 600,
verbose = 0
)
plot(history)
## `geom_smooth()` using formula 'y ~ x'
# fine tuning
unfreeze_weights(conv_base, from = "block5_conv1") # unfreeze lower portion of vgg16 for tuning
model %>% compile(
loss = "categorical_crossentropy",
optimizer = optimizer_adam(lr = 1e-5),
metrics = c("accuracy")
)
history <- model %>% fit_generator(
train_generator,
steps_per_epoch = 1320,
epochs = 30,
validation_data = validation_generator,
validation_steps = 600,
verbose = 0
)
plot(history)
## `geom_smooth()` using formula 'y ~ x'
model %>% save_model_hdf5('tuned_model.h5')
The final test accuracy is almost 90%, this is a massive improvement over the previous models we tried.
model <- load_model_hdf5('tuned_model.h5')
test_generator <- flow_images_from_directory(
test_dir,
test_datagen,
target_size = c(224, 224),
batch_size = 1,
class_mode = "categorical"
)
model %>% evaluate_generator(test_generator, steps = 600)
## loss accuracy
## 0.4092226 0.8966666
Here we plot a few images and their corresponding prediction to verify the model is working.
It appears the model is indeed working and giving pretty impresive results, even with a limited number of training images we are still able to achieve a high level of accuracy. Additional tuning may be able to increase performance even more but these were the best results I was able to obtain after a few hours of tuning on my personal computer.
test_gen <- flow_images_from_directory(
test_dir,
test_datagen,
target_size = c(224, 224),
batch_size = 6,
class_mode = "categorical"
)
batch = generator_next(test_gen)
test_imgs = batch[[1]][1:6,,,]
test_lab = batch[[2]]
preds = predict(model, test_imgs)
mat = NULL
for (i in 1:dim(test_imgs)[1]){
mat = data.frame(rbind(mat, c(preds[i, which.max(preds[i,])], which.max(preds[i,]), which.max(test_lab[i,]))))
}
for (i in 1:nrow(mat)){
for (j in 2:3){
if (mat[i,j] == 1){
mat[i,j] = 'daisy'
} else if (mat[i,j] == 2){
mat[i,j] = 'dandelion'
} else if (mat[i,j] == 3){
mat[i,j] = 'rose'
} else if (mat[i,j] == 4){
mat[i,j] = 'sunflower'
} else {
mat[i,j] = 'tulip'
}
}
}
Here we plot the predictions to verify they are indeed accurate.
names(mat) = c('Pred Prob', 'Pred Val', 'True Val')
print(mat) # compare prediction with true value
## Pred Prob Pred Val True Val
## 1 1.0000000 sunflower sunflower
## 2 0.9999982 rose rose
## 3 0.9999924 rose rose
## 4 1.0000000 tulip tulip
## 5 0.9997993 tulip tulip
## 6 0.5749264 sunflower tulip
pred_class = mat[,2]
true_class = mat[,3]
{op <- par(mfrow = c(2, 3), pty = "s", mar = c(1, 0, 1, 0))
for (i in 1:6) {
plot(as.raster(test_imgs[i,,,]))
text(x=112, y=210, labels= paste("Prediction:", pred_class[i]), col='black', cex=1.5, font=2)
text(x=112, y=14, labels= paste("True Class:", true_class[i]), col='black', cex=1.5, font=2)
}
par(op)}
We will run a test image of a rose through the first few layers in our model to plot the outputs. This will allow us to see what specific flower attributes are being picked up by the network.
model <- load_model_hdf5('base_model.h5')
img_path <- "C:/Users/ian/Documents/Projects/flowers_final/test/rose/rose675.jpg"
img <- image_load(img_path, target_size = c(224, 224))
img_tensor <- image_to_array(img)
img_tensor <- array_reshape(img_tensor, c(1, 224, 224, 3))
img_tensor <- img_tensor / 255
dim(img_tensor)
## [1] 1 224 224 3
plot(as.raster(img_tensor[1,,,]))
layer_outputs <- lapply(model$layers[1:8], function(layer) layer$output)
activation_model <- keras_model(inputs = model$input, outputs = layer_outputs)
activations <- activation_model %>% predict(img_tensor)
first_layer_activation <- activations[[1]]
dim(first_layer_activation)
## [1] 1 222 222 32
plot_channel <- function(channel) {
rotate <- function(x) t(apply(x, 2, rev))
image(rotate(channel), axes = FALSE, asp = 1,
col = terrain.colors(12))
}
By plotting the intermediate activations we can see how the network is able to recognize roses, the first layers extract more general features while the bottom layers extract attributes that are more abstract and less visually interpretable.
image_size <- 64
images_per_row <- 16
for (i in 1:8) {
layer_activation <- activations[[i]]
layer_name <- model$layers[[i]]$name
n_features <- dim(layer_activation)[[4]]
n_cols <- n_features %/% images_per_row
png(paste0("flower_activations_", i, "_", layer_name, ".png"),
width = image_size * images_per_row,
height = image_size * n_cols)
op <- par(mfrow = c(n_cols, images_per_row), mai = rep_len(0.02, 4))
for (col in 0:(n_cols-1)) {
for (row in 0:(images_per_row-1)) {
channel_image <- layer_activation[1,,,(col*images_per_row) + row + 1]
plot_channel(channel_image)
}
}
par(op)
dev.off()
}
Activation Conv_2d_1
Activation Conv_2d_2
Activation Conv_2d_3
Activation Conv_2d_4